Dropping cols where: grade is missing 90000 -> 89996
With transform="pandas", `func` should return a DataFrame to follow the set_output API.
Total samples loaded : 89996
Pipeline(steps=[('preprocessing',
FunctionTransformer(func=<function convert_to_category at 0x7f1aa69abd00>)),
('feat_trans_loan_grade', LoanGradeTransformer()),
('feat_trans_dti_inc_joint', JointApplicationTransformer()),
('feat_trans_fico_score', FICOScoreTransformer()),
('feat_trans_delinquency', DelinquencyTransformer()),
('feat_trans_inst_income_ratio', Ins...
feature_types=None, gamma=0.1, grow_policy=None,
importance_type=None,
interaction_constraints=None, learning_rate=0.3,
max_bin=None, max_cat_threshold=None,
max_cat_to_onehot=None, max_delta_step=None,
max_depth=7, max_leaves=None,
min_child_weight=2.5, missing=nan,
monotone_constraints=None, multi_strategy=None,
n_estimators=150, n_jobs=None,
num_parallel_tree=None,
objective='multi:softprob', ...))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Pipeline(steps=[('preprocessing',
FunctionTransformer(func=<function convert_to_category at 0x7f1aa69abd00>)),
('feat_trans_loan_grade', LoanGradeTransformer()),
('feat_trans_dti_inc_joint', JointApplicationTransformer()),
('feat_trans_fico_score', FICOScoreTransformer()),
('feat_trans_delinquency', DelinquencyTransformer()),
('feat_trans_inst_income_ratio', Ins...
feature_types=None, gamma=0.1, grow_policy=None,
importance_type=None,
interaction_constraints=None, learning_rate=0.3,
max_bin=None, max_cat_threshold=None,
max_cat_to_onehot=None, max_delta_step=None,
max_depth=7, max_leaves=None,
min_child_weight=2.5, missing=nan,
monotone_constraints=None, multi_strategy=None,
n_estimators=150, n_jobs=None,
num_parallel_tree=None,
objective='multi:softprob', ...))])FunctionTransformer(func=<function convert_to_category at 0x7f1aa69abd00>)
LoanGradeTransformer()
JointApplicationTransformer()
FICOScoreTransformer()
DelinquencyTransformer()
InstallementIncomeRatio()
NewDtiTransformer()
DummyDropAllButFICOHigh(option=<Options.OFF: 0>)
FunctionTransformer(func=<function get_pipeline.<locals>.remove_columns_with_prefix at 0x7f1aa4fc6e60>,
kw_args={'prefix': 'target__'})XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device='cpu', early_stopping_rounds=None,
enable_categorical=True, eval_metric=None, feature_types=None,
gamma=0.1, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.3, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=7, max_leaves=None,
min_child_weight=2.5, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=150, n_jobs=None,
num_parallel_tree=None, objective='multi:softprob', ...)<Figure size 1100x900 with 0 Axes>
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior. _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.200921 |
| 1 | emp_length_parser__emp_length | 0.000000 |
| 2 | zip__zip_code | 0.014028 |
| 3 | pass__loan_amnt | 0.019779 |
| 4 | pass__installment | 0.028261 |
| 5 | pass__home_ownership | 0.000000 |
| 6 | pass__annual_inc | 0.010391 |
| 7 | pass__verification_status | 0.052370 |
| 8 | pass__purpose | 0.033422 |
| 9 | pass__addr_state | 0.000000 |
| 10 | pass__dti | 0.016340 |
| 11 | pass__delinq_2yrs | 0.000000 |
| 12 | pass__fico_range_low | 0.203827 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.041348 |
| 15 | pass__mths_since_last_delinq | 0.000000 |
| 16 | pass__mths_since_last_record | 0.000000 |
| 17 | pass__open_acc | 0.000000 |
| 18 | pass__pub_rec | 0.000000 |
| 19 | pass__revol_bal | 0.020648 |
| 20 | pass__revol_util | 0.038275 |
| 21 | pass__total_acc | 0.028199 |
| 22 | pass__initial_list_status | 0.046777 |
| 23 | pass__last_fico_range_high | 0.089089 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.000000 |
| 26 | pass__mths_since_last_major_derog | 0.000000 |
| 27 | pass__application_type | 0.000000 |
| 28 | pass__verification_status_joint | 0.000000 |
| 29 | pass__inq_fi | 0.015884 |
| 30 | pass__inq_last_12m | 0.017836 |
| 31 | pass__chargeoff_within_12_mths | 0.000000 |
| 32 | pass__mort_acc | 0.016369 |
| 33 | pass__pub_rec_bankruptcies | 0.000000 |
| 34 | pass__tax_liens | 0.000000 |
| 35 | pass__tot_hi_cred_lim | 0.019794 |
| 36 | pass__total_bal_ex_mort | 0.000970 |
| 37 | installment_income_ratio | 0.053909 |
| 38 | new_dti | 0.031560 |
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.271462 |
| 1 | emp_length_parser__emp_length | 0.008289 |
| 2 | zip__zip_code | 0.106345 |
| 3 | pass__loan_amnt | 0.061711 |
| 4 | pass__installment | 0.046212 |
| 5 | pass__home_ownership | 0.008468 |
| 6 | pass__annual_inc | 0.012985 |
| 7 | pass__verification_status | 0.016868 |
| 8 | pass__purpose | 0.023965 |
| 9 | pass__addr_state | 0.015981 |
| 10 | pass__dti | 0.015480 |
| 11 | pass__delinq_2yrs | 0.011471 |
| 12 | pass__fico_range_low | 0.085221 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.023751 |
| 15 | pass__mths_since_last_delinq | 0.009172 |
| 16 | pass__mths_since_last_record | 0.010692 |
| 17 | pass__open_acc | 0.008931 |
| 18 | pass__pub_rec | 0.009866 |
| 19 | pass__revol_bal | 0.010371 |
| 20 | pass__revol_util | 0.010980 |
| 21 | pass__total_acc | 0.010002 |
| 22 | pass__initial_list_status | 0.048485 |
| 23 | pass__last_fico_range_high | 0.020814 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.013943 |
| 26 | pass__mths_since_last_major_derog | 0.009580 |
| 27 | pass__application_type | 0.013948 |
| 28 | pass__verification_status_joint | 0.014838 |
| 29 | pass__inq_fi | 0.014440 |
| 30 | pass__inq_last_12m | 0.014292 |
| 31 | pass__chargeoff_within_12_mths | 0.013176 |
| 32 | pass__mort_acc | 0.010901 |
| 33 | pass__pub_rec_bankruptcies | 0.012358 |
| 34 | pass__tax_liens | 0.013203 |
| 35 | pass__tot_hi_cred_lim | 0.012728 |
| 36 | pass__total_bal_ex_mort | 0.009067 |
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.186552 |
| 1 | emp_length_parser__emp_length | 0.008174 |
| 2 | zip__zip_code | 0.066410 |
| 3 | pass__loan_amnt | 0.069225 |
| 4 | pass__installment | 0.054875 |
| 5 | pass__home_ownership | 0.008708 |
| 6 | pass__annual_inc | 0.017367 |
| 7 | pass__verification_status | 0.024303 |
| 8 | pass__purpose | 0.027819 |
| 9 | pass__addr_state | 0.014011 |
| 10 | pass__dti | 0.019013 |
| 11 | pass__delinq_2yrs | 0.010187 |
| 12 | pass__fico_range_low | 0.124549 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.032111 |
| 15 | pass__mths_since_last_delinq | 0.009185 |
| 16 | pass__mths_since_last_record | 0.010386 |
| 17 | pass__open_acc | 0.008975 |
| 18 | pass__pub_rec | 0.007683 |
| 19 | pass__revol_bal | 0.011625 |
| 20 | pass__revol_util | 0.013283 |
| 21 | pass__total_acc | 0.011336 |
| 22 | pass__initial_list_status | 0.055525 |
| 23 | pass__last_fico_range_high | 0.029334 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.011566 |
| 26 | pass__mths_since_last_major_derog | 0.010041 |
| 27 | pass__application_type | 0.018512 |
| 28 | pass__annual_inc_joint | 0.011335 |
| 29 | pass__dti_joint | 0.017017 |
| 30 | pass__verification_status_joint | 0.013634 |
| 31 | pass__inq_fi | 0.017480 |
| 32 | pass__inq_last_12m | 0.016551 |
| 33 | pass__chargeoff_within_12_mths | 0.006341 |
| 34 | pass__mort_acc | 0.012275 |
| 35 | pass__pub_rec_bankruptcies | 0.012554 |
| 36 | pass__tax_liens | 0.008045 |
| 37 | pass__tot_hi_cred_lim | 0.014740 |
| 38 | pass__total_bal_ex_mort | 0.009274 |
While the performance for A,B,C grades is relatively acceptable (F1 > ~0.8) performance when predicting the lower grades is very poor (especially for grade G which is almost never classified correctly).
As we have learnt when building our default risk model there difference in returns and other features does not vary as much for lower quality grades which might make it hard to distinguish them. Therefore we'll use the same approach and merge E-F-G grades into a single group.
Merging E-F-G loan grades into a single group¶
Training: XGBoostF1Multiclass with: {'feat_trans_delinquency__option': 0, 'feat_trans_dti_inc_joint__option': 0, 'feat_trans_dummy_DROP_ALL_BUT_FICO_HIGH__option': 0, 'feat_trans_fico_score__option': 0, 'feat_trans_inst_income_ratio__option': 0, 'feat_trans_loan_grade__option': 0, 'feat_trans_new_dti_after_loan__option': 0, 'model__n_estimators': 150, 'model__min_child_weight': 2.5, 'model__max_depth': 7, 'model__learning_rate': 0.3, 'model__gamma': 0.1}
XGBoostF1Multiclass: 60.8 seconds
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.200921 |
| 1 | emp_length_parser__emp_length | 0.000000 |
| 2 | zip__zip_code | 0.014028 |
| 3 | pass__loan_amnt | 0.019779 |
| 4 | pass__installment | 0.028261 |
| 5 | pass__home_ownership | 0.000000 |
| 6 | pass__annual_inc | 0.010391 |
| 7 | pass__verification_status | 0.052370 |
| 8 | pass__purpose | 0.033422 |
| 9 | pass__addr_state | 0.000000 |
| 10 | pass__dti | 0.016340 |
| 11 | pass__delinq_2yrs | 0.000000 |
| 12 | pass__fico_range_low | 0.203827 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.041348 |
| 15 | pass__mths_since_last_delinq | 0.000000 |
| 16 | pass__mths_since_last_record | 0.000000 |
| 17 | pass__open_acc | 0.000000 |
| 18 | pass__pub_rec | 0.000000 |
| 19 | pass__revol_bal | 0.020648 |
| 20 | pass__revol_util | 0.038275 |
| 21 | pass__total_acc | 0.028199 |
| 22 | pass__initial_list_status | 0.046777 |
| 23 | pass__last_fico_range_high | 0.089089 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.000000 |
| 26 | pass__mths_since_last_major_derog | 0.000000 |
| 27 | pass__application_type | 0.000000 |
| 28 | pass__verification_status_joint | 0.000000 |
| 29 | pass__inq_fi | 0.015884 |
| 30 | pass__inq_last_12m | 0.017836 |
| 31 | pass__chargeoff_within_12_mths | 0.000000 |
| 32 | pass__mort_acc | 0.016369 |
| 33 | pass__pub_rec_bankruptcies | 0.000000 |
| 34 | pass__tax_liens | 0.000000 |
| 35 | pass__tot_hi_cred_lim | 0.019794 |
| 36 | pass__total_bal_ex_mort | 0.000970 |
| 37 | installment_income_ratio | 0.053909 |
| 38 | new_dti | 0.031560 |
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.271462 |
| 1 | emp_length_parser__emp_length | 0.008289 |
| 2 | zip__zip_code | 0.106345 |
| 3 | pass__loan_amnt | 0.061711 |
| 4 | pass__installment | 0.046212 |
| 5 | pass__home_ownership | 0.008468 |
| 6 | pass__annual_inc | 0.012985 |
| 7 | pass__verification_status | 0.016868 |
| 8 | pass__purpose | 0.023965 |
| 9 | pass__addr_state | 0.015981 |
| 10 | pass__dti | 0.015480 |
| 11 | pass__delinq_2yrs | 0.011471 |
| 12 | pass__fico_range_low | 0.085221 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.023751 |
| 15 | pass__mths_since_last_delinq | 0.009172 |
| 16 | pass__mths_since_last_record | 0.010692 |
| 17 | pass__open_acc | 0.008931 |
| 18 | pass__pub_rec | 0.009866 |
| 19 | pass__revol_bal | 0.010371 |
| 20 | pass__revol_util | 0.010980 |
| 21 | pass__total_acc | 0.010002 |
| 22 | pass__initial_list_status | 0.048485 |
| 23 | pass__last_fico_range_high | 0.020814 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.013943 |
| 26 | pass__mths_since_last_major_derog | 0.009580 |
| 27 | pass__application_type | 0.013948 |
| 28 | pass__verification_status_joint | 0.014838 |
| 29 | pass__inq_fi | 0.014440 |
| 30 | pass__inq_last_12m | 0.014292 |
| 31 | pass__chargeoff_within_12_mths | 0.013176 |
| 32 | pass__mort_acc | 0.010901 |
| 33 | pass__pub_rec_bankruptcies | 0.012358 |
| 34 | pass__tax_liens | 0.013203 |
| 35 | pass__tot_hi_cred_lim | 0.012728 |
| 36 | pass__total_bal_ex_mort | 0.009067 |
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.186552 |
| 1 | emp_length_parser__emp_length | 0.008174 |
| 2 | zip__zip_code | 0.066410 |
| 3 | pass__loan_amnt | 0.069225 |
| 4 | pass__installment | 0.054875 |
| 5 | pass__home_ownership | 0.008708 |
| 6 | pass__annual_inc | 0.017367 |
| 7 | pass__verification_status | 0.024303 |
| 8 | pass__purpose | 0.027819 |
| 9 | pass__addr_state | 0.014011 |
| 10 | pass__dti | 0.019013 |
| 11 | pass__delinq_2yrs | 0.010187 |
| 12 | pass__fico_range_low | 0.124549 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.032111 |
| 15 | pass__mths_since_last_delinq | 0.009185 |
| 16 | pass__mths_since_last_record | 0.010386 |
| 17 | pass__open_acc | 0.008975 |
| 18 | pass__pub_rec | 0.007683 |
| 19 | pass__revol_bal | 0.011625 |
| 20 | pass__revol_util | 0.013283 |
| 21 | pass__total_acc | 0.011336 |
| 22 | pass__initial_list_status | 0.055525 |
| 23 | pass__last_fico_range_high | 0.029334 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.011566 |
| 26 | pass__mths_since_last_major_derog | 0.010041 |
| 27 | pass__application_type | 0.018512 |
| 28 | pass__annual_inc_joint | 0.011335 |
| 29 | pass__dti_joint | 0.017017 |
| 30 | pass__verification_status_joint | 0.013634 |
| 31 | pass__inq_fi | 0.017480 |
| 32 | pass__inq_last_12m | 0.016551 |
| 33 | pass__chargeoff_within_12_mths | 0.006341 |
| 34 | pass__mort_acc | 0.012275 |
| 35 | pass__pub_rec_bankruptcies | 0.012554 |
| 36 | pass__tax_liens | 0.008045 |
| 37 | pass__tot_hi_cred_lim | 0.014740 |
| 38 | pass__total_bal_ex_mort | 0.009274 |
Classifying Subgrades¶
Dropping cols where: sub_grade is missing 90000 -> 89996
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/preprocessing/_function_transformer.py:345: UserWarning: With transform="pandas", `func` should return a DataFrame to follow the set_output API. warnings.warn(
Total samples loaded : 89996
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error. warnings.warn(
Training: XGBoostF1Multiclass with: {'feat_trans_delinquency__option': 0, 'feat_trans_dti_inc_joint__option': 0, 'feat_trans_dummy_DROP_ALL_BUT_FICO_HIGH__option': 0, 'feat_trans_fico_score__option': 0, 'feat_trans_inst_income_ratio__option': 0, 'feat_trans_loan_grade__option': 0, 'feat_trans_new_dti_after_loan__option': 0, 'model__n_estimators': 150, 'model__min_child_weight': 2.5, 'model__max_depth': 7, 'model__learning_rate': 0.3, 'model__gamma': 0.1}
XGBoostF1Multiclass: 293.7 seconds
Predicting Sub-grades¶
We've attempted to build a model which predicts individual sub-grades (e.g. A1, A2 ... G5) in addition to top level grades. An XGBoost multi-classificaiton model was used, however the performance was unsatisfactory. We've included the overal performance summary and feature importances below. However, we decide to not provide an indepth analysis because the model would not be useful for any practical applications (a different approach would probably suit this problem better due to the high number of classes and their ordinal nature)
XGBoostF1Multiclass
{'f1': 0.199, 'accuracy': 0.2733, 'precision': 0.2108, 'recall': 0.1947, 'log_loss': 2}
None
/home/paulius/data/projects/m3_s3_lending/shared/graph.py:457: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator. ax.set_yticklabels(
| Feature | Importance | |
|---|---|---|
| 0 | term_parser__term | 0.106485 |
| 1 | emp_length_parser__emp_length | 0.016260 |
| 2 | zip__zip_code | 0.063670 |
| 3 | pass__loan_amnt | 0.048561 |
| 4 | pass__installment | 0.046285 |
| 5 | pass__home_ownership | 0.017024 |
| 6 | pass__annual_inc | 0.020558 |
| 7 | pass__verification_status | 0.024662 |
| 8 | pass__purpose | 0.026665 |
| 9 | pass__addr_state | 0.025046 |
| 10 | pass__dti | 0.021775 |
| 11 | pass__delinq_2yrs | 0.017929 |
| 12 | pass__fico_range_low | 0.065871 |
| 13 | pass__fico_range_high | 0.000000 |
| 14 | pass__inq_last_6mths | 0.027946 |
| 15 | pass__mths_since_last_delinq | 0.017716 |
| 16 | pass__mths_since_last_record | 0.019174 |
| 17 | pass__open_acc | 0.016289 |
| 18 | pass__pub_rec | 0.020212 |
| 19 | pass__revol_bal | 0.018316 |
| 20 | pass__revol_util | 0.019093 |
| 21 | pass__total_acc | 0.017927 |
| 22 | pass__initial_list_status | 0.033912 |
| 23 | pass__last_fico_range_high | 0.024567 |
| 24 | pass__last_fico_range_low | 0.000000 |
| 25 | pass__collections_12_mths_ex_med | 0.016248 |
| 26 | pass__mths_since_last_major_derog | 0.017968 |
| 27 | pass__application_type | 0.026973 |
| 28 | pass__annual_inc_joint | 0.021111 |
| 29 | pass__dti_joint | 0.022676 |
| 30 | pass__verification_status_joint | 0.022423 |
| 31 | pass__inq_fi | 0.023245 |
| 32 | pass__inq_last_12m | 0.022318 |
| 33 | pass__chargeoff_within_12_mths | 0.018417 |
| 34 | pass__mort_acc | 0.017800 |
| 35 | pass__pub_rec_bankruptcies | 0.020849 |
| 36 | pass__tax_liens | 0.016260 |
| 37 | pass__tot_hi_cred_lim | 0.020096 |
| 38 | pass__total_bal_ex_mort | 0.017674 |